Renewable Energy Data EDA¶

Import to dataframes¶

In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import geopandas as gpd
import folium
import seaborn as sns


# Postcode data (SSI = small scale installations, SGU = small generation unit, SWH = Solar water heating )
df_SSI_SGU_Hydro = pd.read_csv("data/PostCode Data/Postcode data for small-scale installations - SGU-Hydro.csv")
df_SSI_SGU_Solar = pd.read_csv("data/PostCode Data/Postcode data for small-scale installations - SGU-Solar.csv")
df_SSI_SGU_Wind = pd.read_csv("data/PostCode Data/Postcode data for small-scale installations - SGU-Wind.csv")
df_SSI_SWH_Air_source_heat_pump = pd.read_csv("data/PostCode Data/Postcode data for small-scale installations - SWH-Air-source-heat-pump.csv")
df_SSI_SWH_Solar = pd.read_csv("data/PostCode Data/Postcode data for small-scale installations - SWH-Solar.csv")

# Battery installations with SSI by state
df_battery_installations = pd.read_csv("data/PostCode Data/State data for battery installations with small-scale systems.csv")

# Australian postcode data reference dataset
df_AU_postcode_dataset = pd.read_excel("data/australian_postcodes.xlsx")

# Whole REC database (REC = renewable energy certificate)
df_REC_database = pd.read_csv("data/wholeRECdatabase_20230120_no_dups.csv",encoding='cp1252')
In [ ]:
df_list = [
df_SSI_SGU_Hydro,
df_SSI_SGU_Solar,
df_SSI_SGU_Wind,
df_SSI_SWH_Air_source_heat_pump,
df_SSI_SWH_Solar,
df_battery_installations,
df_AU_postcode_dataset,
df_REC_database
]

for idx, df in enumerate(df_list, start=1):
    print(f"DataFrame {idx} - Name: {df.name if hasattr(df, 'name') else 'Unnamed'}, Shape: {df.shape}")
DataFrame 1 - Name: Unnamed, Shape: (20, 7)
DataFrame 2 - Name: Unnamed, Shape: (2806, 41)
DataFrame 3 - Name: Unnamed, Shape: (253, 5)
DataFrame 4 - Name: Unnamed, Shape: (2610, 21)
DataFrame 5 - Name: Unnamed, Shape: (2964, 21)
DataFrame 6 - Name: Unnamed, Shape: (10, 10)
DataFrame 7 - Name: Unnamed, Shape: (18513, 33)
DataFrame 8 - Name: Unnamed, Shape: (6271416, 17)
In [ ]:
df_REC_database.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6271416 entries, 0 to 6271415
Data columns (total 17 columns):
 #   Column                       Dtype  
---  ------                       -----  
 0   Status                       object 
 1   Owner                        object 
 2   Accreditation_Code           object 
 3   Fuel_Source_Active           bool   
 4   Fuel_Source_Type             object 
 5   Fuel_Source_Display_Name     object 
 6   Generation_Year              int64  
 7   Status.1                     object 
 8   Start_Serial                 float64
 9   End_Serial                   float64
 10  State                        object 
 11  Owner_Name                   object 
 12  Creation_Year                int64  
 13  Range_ID                     int64  
 14  Creation_Date                object 
 15  Public_Registered_Person_ID  int64  
 16  Created_By                   object 
dtypes: bool(1), float64(2), int64(4), object(10)
memory usage: 771.5+ MB
In [ ]:
df_compare_systems = pd.DataFrame([], columns=['Type','Locations','System Quantity', 'KW'])
In [ ]:
sgu_hydro = df_SSI_SGU_Hydro["Small Unit Installation Postcode"].drop_duplicates()

postcodes = sgu_hydro.shape[0]
quantity = sum(df_SSI_SGU_Hydro["Previous Years (2001-2021) - Installation Quantity"])
kw = round(sum(df_SSI_SGU_Hydro["Previous Years (2001-2021) - SGU Output Rated Output In kW"]),2)

print(postcodes, "unique hydro postcodes")
print(quantity, "total hydro units")
print(kw , "total hydro kw")

hydro_df = pd.DataFrame([["Hydro",postcodes, quantity, kw]],columns=['Type', 'Locations','System Quantity', 'KW'])
df_compare_systems = pd.concat([df_compare_systems, hydro_df])
df_compare_systems
20 unique hydro postcodes
20 total hydro units
50.7 total hydro kw
Out[ ]:
Type Locations System Quantity KW
0 Hydro 20 20 50.7
In [ ]:
sgu_solar = df_SSI_SGU_Solar["Small Unit Installation Postcode"].drop_duplicates()

postcodes = sgu_solar.shape[0]
quantity = sum(df_SSI_SGU_Solar["Previous Years (2001-2021) - Installation Quantity"])
kw = round(sum(df_SSI_SGU_Solar["Previous Years (2001-2021) - SGU Output Rated Output In kW"]),2)

print(postcodes, "unique solar postcodes")
print(quantity, "total solar units")
print(kw , "total solar kw")

solar_df = pd.DataFrame([["Solar",postcodes, quantity, kw]],columns=['Type', 'Locations','System Quantity', 'KW'])
df_compare_systems = pd.concat([df_compare_systems, solar_df])
df_compare_systems
2806 unique solar postcodes
3073708 total solar units
16519611.88 total solar kw
Out[ ]:
Type Locations System Quantity KW
0 Hydro 20 20 50.70
0 Solar 2806 3073708 16519611.88
In [ ]:
sgu_wind = df_SSI_SGU_Wind["Small Unit Installation Postcode"].drop_duplicates()

postcodes = sgu_wind.shape[0]
quantity = sum(df_SSI_SGU_Wind["Previous Years (2001-2021) - Installation Quantity"])
kw = round(sum(df_SSI_SGU_Wind["Previous Years (2001-2021) - SGU Output Rated Output In kW"]),2)

print(postcodes, "unique wind postcodes")
print(quantity, "total wind units")
print(kw , "total wind kw")

wind_df = pd.DataFrame([["Wind",postcodes, quantity, kw]],columns=['Type', 'Locations','System Quantity', 'KW'])
df_compare_systems = pd.concat([df_compare_systems, wind_df])
df_compare_systems
253 unique wind postcodes
424 total wind units
1468.67 total wind kw
Out[ ]:
Type Locations System Quantity KW
0 Hydro 20 20 50.70
0 Solar 2806 3073708 16519611.88
0 Wind 253 424 1468.67
In [ ]:
swh_hp = df_SSI_SWH_Air_source_heat_pump["Small Unit Installation Postcode"].drop_duplicates()

postcodes = swh_hp.shape[0]
quantity = sum(df_SSI_SWH_Air_source_heat_pump["Previous Years (2001-2021) - Installation Quantity"])
kw = 0 # no kw column

print(postcodes, "unique heat-pump postcodes")
print(quantity, "total heat-pump units")
print(kw , "total heat-pump kw")

hp_df = pd.DataFrame([["Heat-pump",postcodes, quantity, kw]],columns=['Type', 'Locations','System Quantity', 'KW'])
df_compare_systems = pd.concat([df_compare_systems, hp_df])
df_compare_systems
2610 unique heat-pump postcodes
365635 total heat-pump units
0 total heat-pump kw
Out[ ]:
Type Locations System Quantity KW
0 Hydro 20 20 50.70
0 Solar 2806 3073708 16519611.88
0 Wind 253 424 1468.67
0 Heat-pump 2610 365635 0.00
In [ ]:
swh_solar = df_SSI_SWH_Solar["Small Unit Installation Postcode"].drop_duplicates()

postcodes = swh_solar.shape[0]
quantity = sum(df_SSI_SWH_Solar["Previous Years (2001-2021) - Installation Quantity"])
kw = 0 # no kw column

print(postcodes, "unique swh-solar postcodes")
print(quantity, "total swh-solar units")
print(kw , "total swh-solar kw")

swh_solar_df = pd.DataFrame([["SWH-Solar",postcodes, quantity, kw]],columns=['Type', 'Locations','System Quantity', 'KW'])
df_compare_systems = pd.concat([df_compare_systems, swh_solar_df])
df_compare_systems
2964 unique swh-solar postcodes
1036390 total swh-solar units
0 total swh-solar kw
Out[ ]:
Type Locations System Quantity KW
0 Hydro 20 20 50.70
0 Solar 2806 3073708 16519611.88
0 Wind 253 424 1468.67
0 Heat-pump 2610 365635 0.00
0 SWH-Solar 2964 1036390 0.00
In [ ]:
df_compare_systems[["Locations","System Quantity","KW"]] = df_compare_systems[["Locations","System Quantity","KW"]].apply(pd.to_numeric)
Locations_graph = df_compare_systems.plot.bar(x='Type', y='Locations', rot=0)
System_Quantity_graph = df_compare_systems.plot.bar(x='Type', y='System Quantity', rot=0)
kW_graph = df_compare_systems.plot.bar(x='Type', y='KW', rot=0)

From this, there are two questions:¶

1- Do swh units not produce KW or is it simply not included in the datset?

2- Is the observed KW value already calculated based on the system quantity, or should the two be multiplied to find the overall kw production per postcode?

The main observations¶

1- Most locations that have solar panels will also have a solar water heating which makes intuitive sense.

2- In a given household you may have 1 heat pump and 1 SWH-solar but many panels which explains the quantity difference.

3- It's very rare for residential properties to have wind or hydro which explains the low numbers compared to solar.

4- Between 2001 and 2021 (The last 20 years), Australian residents installed around 16 Million kW of solar energy, 1500kW of Wind energy, and 50kW of hydro energy.

Creating a combined postcode dataset

In [ ]:
swh_solar_df = pd.DataFrame([["SWH-Solar",postcodes, quantity, kw]],columns=['Type', 'Locations','System Quantity', 'KW'])

df_compare_systems = pd.concat([df_compare_systems, swh_solar_df])

df_SSI_SGU_Hydro_small = df_SSI_SGU_Hydro[['Small Unit Installation Postcode','Previous Years (2001-2021) - Installation Quantity','Previous Years (2001-2021) - SGU Output Rated Output In kW']]
df_SSI_SGU_Solar_small = df_SSI_SGU_Solar[['Small Unit Installation Postcode','Previous Years (2001-2021) - Installation Quantity','Previous Years (2001-2021) - SGU Output Rated Output In kW']]
df_SSI_SGU_Wind_small = df_SSI_SGU_Wind[['Small Unit Installation Postcode','Previous Years (2001-2021) - Installation Quantity','Previous Years (2001-2021) - SGU Output Rated Output In kW']]
df_SSI_SWH_Air_source_heat_pump_small = df_SSI_SWH_Air_source_heat_pump[['Small Unit Installation Postcode','Previous Years (2001-2021) - Installation Quantity']]
df_SSI_SWH_Solar_small = df_SSI_SWH_Solar[['Small Unit Installation Postcode','Previous Years (2001-2021) - Installation Quantity']]

df_SSI_SGU_Hydro_small2= df_SSI_SGU_Hydro_small.copy(deep=True)
df_SSI_SGU_Solar_small2= df_SSI_SGU_Solar_small.copy(deep=True)
df_SSI_SGU_Wind_small2= df_SSI_SGU_Wind_small.copy(deep=True)
df_SSI_SWH_Air_source_heat_pump_small2= df_SSI_SWH_Air_source_heat_pump_small.copy(deep=True)
df_SSI_SWH_Solar_small2= df_SSI_SWH_Solar_small.copy(deep=True)

df_SSI_SWH_Air_source_heat_pump_small2["Previous Years (2001-2021) - SGU Output Rated Output In kW"] = None
df_SSI_SWH_Solar_small2["Previous Years (2001-2021) - SGU Output Rated Output In kW"] = None


df_SSI_SGU_Hydro_small2['Type'] = "Hydro"
df_SSI_SGU_Solar_small2['Type'] ="Solar"
df_SSI_SGU_Wind_small2['Type'] = "Wind"
df_SSI_SWH_Air_source_heat_pump_small2['Type'] = "SWG- Heat Pump"
df_SSI_SWH_Solar_small2['Type'] = "SWH Solar"

df_systems_combined = pd.concat([df_SSI_SGU_Hydro_small2,df_SSI_SGU_Solar_small2,df_SSI_SGU_Wind_small2,df_SSI_SWH_Air_source_heat_pump_small2,df_SSI_SWH_Solar_small2])

Cleaning up the Australian postcode reference dataset

In [ ]:
df_AU_postcode_dataset_small = df_AU_postcode_dataset[['Postcode','State','Long','Lat','LGA Region','Electorate Rating']]
df_AU_postcode_dataset_small = df_AU_postcode_dataset_small.drop_duplicates(subset='Postcode', keep="first")
df_AU_postcode_dataset_small
Out[ ]:
Postcode State Long Lat LGA Region Electorate Rating
0 200 ACT 149.119000 -35.277700 Unincorporated ACT NaN
2 800 NT 130.836680 -12.458684 Darwin Waterfront Precinct Inner Metropolitan
4 801 NT 130.836680 -12.458684 Darwin Waterfront Precinct Rural
5 803 NT 0.000000 0.000000 NaN NaN
6 804 NT 130.873315 -12.428017 Darwin NaN
... ... ... ... ... ... ...
18508 9013 QLD 152.823141 -27.603479 Ipswich NaN
18509 9015 QLD 152.823141 -27.603479 Ipswich NaN
18510 9464 QLD 153.074982 -27.397055 Brisbane NaN
18511 9726 QLD 153.412197 -28.008783 Gold Coast NaN
18512 9999 VIC 144.956776 -37.817403 Melbourne NaN

3173 rows × 6 columns

Merge in the postcode reference data with combined postcode dataset

In [ ]:
df_systems_combined_extra = df_systems_combined.merge(df_AU_postcode_dataset_small, how='left', left_on='Small Unit Installation Postcode', right_on='Postcode')

df_systems_combined_extra.isnull().sum()
Out[ ]:
Small Unit Installation Postcode                                 0
Previous Years (2001-2021) - Installation Quantity               0
Previous Years (2001-2021) - SGU Output Rated Output In kW    5574
Type                                                             0
Postcode                                                       217
State                                                          217
Long                                                           217
Lat                                                            217
LGA Region                                                     218
Electorate Rating                                             1021
dtype: int64

We expect kW output to have null values but not other merged columns which all suspiciously have 217 null values

In [ ]:
df_systems_combined_extra[df_systems_combined_extra.Postcode.isnull()]
Out[ ]:
Small Unit Installation Postcode Previous Years (2001-2021) - Installation Quantity Previous Years (2001-2021) - SGU Output Rated Output In kW Type Postcode State Long Lat LGA Region Electorate Rating
20 0 4 4.46 Solar NaN NaN NaN NaN NaN NaN
101 1848 1 1.04 Solar NaN NaN NaN NaN NaN NaN
510 2552 1 0.56 Solar NaN NaN NaN NaN NaN NaN
1502 4062 1 1.50 Solar NaN NaN NaN NaN NaN NaN
1503 4063 1 1.25 Solar NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ...
8645 7919 1 NaN SWH Solar NaN NaN NaN NaN NaN NaN
8646 7920 1 NaN SWH Solar NaN NaN NaN NaN NaN NaN
8648 8084 1 NaN SWH Solar NaN NaN NaN NaN NaN NaN
8650 9072 1 NaN SWH Solar NaN NaN NaN NaN NaN NaN
8651 9156 1 NaN SWH Solar NaN NaN NaN NaN NaN NaN

217 rows × 10 columns

After inspecting these 217 rows, we find that they don't correspond to legitimate postcodes and seem to be a data mistake caused by the author of the dataset, likely by prefilling consecutive postcode values.

Therefore we can remove these 217 rows

In [ ]:
df_systems_combined_cleaned = df_systems_combined_extra[~df_systems_combined_extra.Postcode.isnull()]
df_systems_combined_cleaned

#df_systems_combined_cleaned.to_excel("Output2.xlsx")
Out[ ]:
Small Unit Installation Postcode Previous Years (2001-2021) - Installation Quantity Previous Years (2001-2021) - SGU Output Rated Output In kW Type Postcode State Long Lat LGA Region Electorate Rating
0 837 1 4.00 Hydro 837.0 NT 131.016647 -12.704767 Litchfield Rural
1 2484 1 1.90 Hydro 2484.0 NSW 153.316480 -28.380590 Tweed Rural
2 2527 1 4.00 Hydro 2527.0 NSW 150.754592 -34.583539 Shellharbour Provincial
3 3409 1 1.05 Hydro 3409.0 VIC 141.863237 -36.724843 Horsham Rural
4 3691 1 1.00 Hydro 3691.0 VIC 147.880991 -36.027154 Towong Rural
... ... ... ... ... ... ... ... ... ... ...
8642 7469 2 NaN SWH Solar 7469.0 TAS 145.201632 -41.784391 Waratah-Wynyard Rural
8643 7470 3 NaN SWH Solar 7470.0 TAS 145.500566 -41.902129 West Coast Rural
8647 8002 1 NaN SWH Solar 8002.0 VIC 144.982207 -37.818517 Yarra NaN
8649 9000 1 NaN SWH Solar 9000.0 QLD 152.823141 -27.603479 Ipswich NaN
8652 9726 2 NaN SWH Solar 9726.0 QLD 153.412197 -28.008783 Gold Coast NaN

8436 rows × 10 columns

In [ ]:
m = folium.Map(location=[-38.2744, 140.7751], zoom_start=4)



for index, row in df_systems_combined_cleaned.iterrows():
    
        folium.Circle(
            location=[row['Lat'], row['Long']],
            popup= 'Postcode:' +str(row['Postcode']),
            tooltip=row['Postcode'],
            radius= 4000,
            color='green',
            fill=True,
            fill_color='black'
        ).add_to(m)
  
         
m
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
grouped = df_systems_combined_cleaned.groupby(['State','Type']).aggregate({
    'Small Unit Installation Postcode':'count',
    'Previous Years (2001-2021) - Installation Quantity': 'sum',
    'Previous Years (2001-2021) - SGU Output Rated Output In kW':'sum'
    })
grouped
Out[ ]:
Small Unit Installation Postcode Previous Years (2001-2021) - Installation Quantity Previous Years (2001-2021) - SGU Output Rated Output In kW
State Type
ACT SWG- Heat Pump 25 4991 0.000
SWH Solar 27 9870 0.000
Solar 28 39672 239567.907
Wind 2 2 15.000
NSW Hydro 2 2 5.900
SWG- Heat Pump 638 86488 0.000
SWH Solar 656 217645 0.000
Solar 661 758663 4294017.185
Wind 83 171 1053.800
NT Hydro 1 1 4.000
SWG- Heat Pump 30 1894 0.000
SWH Solar 45 20524 0.000
Solar 46 19998 150530.170
QLD Hydro 2 2 2.000
SWG- Heat Pump 406 104292 0.000
SWH Solar 438 217038 0.000
Solar 441 850776 4519864.468
Wind 25 43 31.550
SA Hydro 1 1 1.500
SWG- Heat Pump 328 28898 0.000
SWH Solar 338 46887 0.000
Solar 342 339789 1831194.238
Wind 41 69 103.950
TAS Hydro 8 8 32.480
SWG- Heat Pump 113 10682 0.000
SWH Solar 114 7510 0.000
Solar 118 44188 217249.362
Wind 21 26 68.550
VIC Hydro 5 5 3.750
SWG- Heat Pump 701 98073 0.000
SWH Solar 708 340655 0.000
Solar 711 601962 3262412.246
Wind 54 75 131.550
WA Hydro 1 1 1.075
SWG- Heat Pump 365 30301 0.000
SWH Solar 441 175275 0.000
Solar 443 418640 2004744.359
Wind 27 38 64.270
In [ ]:
grouped = grouped.reset_index()


plt.figure(figsize=(20, 10))


sns.set_palette("husl")


plt.subplot(1, 3, 2)
sns.barplot(data=grouped, x='State', y='Previous Years (2001-2021) - Installation Quantity', hue='Type')
plt.title('Previous Years (2001-2021) - Installation Quantity by State and Type')
plt.xticks(rotation=45)


plt.subplot(1, 3, 3)
sns.barplot(data=grouped, x='State', y='Previous Years (2001-2021) - SGU Output Rated Output In kW', hue='Type')
plt.title('Previous Years (2001-2021) - SGU Output Rated Output In kW by State and Type')
plt.xticks(rotation=45)


plt.tight_layout()


plt.show()

This gives us a good comparisons for solar,Solar water, and total kW for each sate but not for wind or hydro.

In [ ]:
df_systems_combined_cleaned_wind = df_systems_combined_cleaned.loc[df_systems_combined_cleaned['Type'] == "Wind"]

grouped_wind = df_systems_combined_cleaned_wind.groupby(['State','Type']).aggregate({
    'Small Unit Installation Postcode':'count',
    'Previous Years (2001-2021) - Installation Quantity': 'sum',
    'Previous Years (2001-2021) - SGU Output Rated Output In kW':'sum'
    })

df_systems_combined_cleaned_hydro = df_systems_combined_cleaned.loc[df_systems_combined_cleaned['Type'] == "Hydro"]

grouped_hydro = df_systems_combined_cleaned_hydro.groupby(['State','Type']).aggregate({
    'Small Unit Installation Postcode':'count',
    'Previous Years (2001-2021) - Installation Quantity': 'sum',
    'Previous Years (2001-2021) - SGU Output Rated Output In kW':'sum'
    })
In [ ]:
grouped_wind = grouped_wind.reset_index()
grouped_hydro = grouped_hydro.reset_index()

plt.figure(figsize=(20, 10))
sns.set_palette("husl")

plt.subplot(1, 3, 2)
sns.barplot(data=grouped_wind, x='State', y='Previous Years (2001-2021) - Installation Quantity', hue='Type')
plt.title('Previous Years (2001-2021) - Installation Quantity by State and Type')
plt.xticks(rotation=45)

plt.subplot(1, 3, 3)
sns.barplot(data=grouped_wind, x='State', y='Previous Years (2001-2021) - SGU Output Rated Output In kW', hue='Type')
plt.title('Previous Years (2001-2021) - SGU Output Rated Output In kW by State and Type')
plt.xticks(rotation=45)


plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


grouped_wind = grouped_wind.reset_index()
grouped_hydro = grouped_hydro.reset_index()

plt.figure(figsize=(20, 10))
sns.set_palette("husl")

plt.subplot(1, 3, 2)
sns.barplot(data=grouped_hydro, x='State', y='Previous Years (2001-2021) - Installation Quantity', hue='Type')
plt.title('Previous Years (2001-2021) - Installation Quantity by State and Type')
plt.xticks(rotation=45)

plt.subplot(1, 3, 3)
sns.barplot(data=grouped_hydro, x='State', y='Previous Years (2001-2021) - SGU Output Rated Output In kW', hue='Type')
plt.title('Previous Years (2001-2021) - SGU Output Rated Output In kW by State and Type')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()
In [ ]:
df_battery_installations

df_battery_installations2 = df_battery_installations.melt(id_vars=['Installation year'], value_vars=df_battery_installations.columns[1:-1], var_name='State', value_name='Total Installations')


df_battery_installations2 = pd.DataFrame(df_battery_installations2)
df_battery_installations2 = df_battery_installations2.replace(',','', regex=True)
df_battery_installations2['Total Installations'] = pd.to_numeric(df_battery_installations2['Total Installations'], errors='coerce')


df_battery_installations3 = df_battery_installations2.groupby(by='State').aggregate({
    'Total Installations':'sum',
    }).reset_index()


df_battery_installations3.info()
print(df_battery_installations3)

plt.figure(figsize=(20, 10))
sns.set_palette("husl")

plt.subplot(1, 3, 2)
sns.barplot(data=df_battery_installations3, x='State', y='Total Installations')
plt.title('battery installations')
plt.xticks(rotation=45)

plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 2 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   State                8 non-null      object
 1   Total Installations  8 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 256.0+ bytes
  State  Total Installations
0   ACT                 3410
1  NSW                 16717
2   NT                  1494
3  QLD                 12933
4   SA                 16802
5  TAS                   993
6  VIC                 16396
7    WA                 7372

Time series¶

In [ ]:
#print(df_battery_installations2.to_markdown())

plt.figure(figsize=(20, 10))
sns.set_palette("husl")

plt.subplot(1, 3, 2)
sns.lineplot(data=df_battery_installations2, x='Installation year', y='Total Installations',hue='State')
plt.title('battery installations')
plt.xticks(rotation=45)

plt.xticks(rotation=45)
#plt.yscale("log")
plt.tight_layout()
plt.show()
In [ ]:
plt.figure(figsize=(20, 10))
sns.set_palette("husl")

plt.subplot(1, 3, 2)

# Smooth trend lines
sns.lineplot(data=df_battery_installations2, x='Installation year', y='Total Installations', hue='State', estimator='mean', ci=None)

plt.title('Battery Installations')
plt.xticks(rotation=45)

plt.yscale("log")

plt.tight_layout()
plt.show()
C:\Users\danie\AppData\Local\Temp\ipykernel_21944\3895074744.py:7: FutureWarning: 

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.lineplot(data=df_battery_installations2, x='Installation year', y='Total Installations', hue='State', estimator='mean', ci=None)

Merge the dataframes that have monthly columns, wind and hydro do not so just leaves solar and the 2 SWH dataframes

In [ ]:
# stage 1) prep dataframes for combine

df_SSI_SWH_Air_source_heat_pump["Previous Years (2001-2021) - SGU Output Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Jan 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Feb 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Mar 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Apr 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["May 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Jun 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Jul 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Aug 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Sep 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Oct 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Nov 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Dec 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Jan 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Feb 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Mar 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Apr 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["May 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["Jun 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Air_source_heat_pump["SGU Rated Output In kW Total"] = None

df_SSI_SWH_Solar["Previous Years (2001-2021) - SGU Output Rated Output In kW"] = None
df_SSI_SWH_Solar["Jan 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Feb 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Mar 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Apr 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["May 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Jun 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Jul 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Aug 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Sep 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Oct 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Nov 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Dec 2022 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Jan 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Feb 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Mar 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Apr 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["May 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["Jun 2023 - SGU Rated Output In kW"] = None
df_SSI_SWH_Solar["SGU Rated Output In kW Total"] = None

df_SSI_SWH_Solar['Type'] ="SWH-Solar"
df_SSI_SWH_Air_source_heat_pump['Type'] = "SWG- Heat Pump"
df_SSI_SGU_Solar['Type'] = "Solar"

print(df_SSI_SWH_Air_source_heat_pump.shape)
print(df_SSI_SWH_Solar.shape)
print(df_SSI_SGU_Solar.shape)

df_SSI_SGU_Solar.rename(columns={' Installations Quantity Total ': 'Installations Quantity Total'}, inplace=True)

df_systems_monthly = pd.concat([df_SSI_SWH_Air_source_heat_pump,df_SSI_SWH_Solar,df_SSI_SGU_Solar])
print(df_systems_monthly.shape)


# stage 2 ) Add in postcode data

df_systems_monthly = df_systems_monthly.merge(df_AU_postcode_dataset_small, how='left', left_on='Small Unit Installation Postcode', right_on='Postcode')
df_systems_monthly.isnull().sum()

# stage 3) Remove blank rows

df_systems_monthly = df_systems_monthly[~df_systems_monthly.Postcode.isnull()]
df_systems_monthly
df_systems_monthly.isnull().sum()
(2610, 42)
(2964, 42)
(2806, 42)
(8380, 42)
Out[ ]:
Small Unit Installation Postcode                                 0
Previous Years (2001-2021) - Installation Quantity               0
Jan 2022 - Installations Quantity                                0
Feb 2022 - Installations Quantity                                0
Mar 2022 - Installations Quantity                                0
Apr 2022 - Installations Quantity                                0
May 2022 - Installations Quantity                                0
Jun 2022 - Installations Quantity                                0
Jul 2022 - Installations Quantity                                0
Aug 2022 - Installations Quantity                                0
Sep 2022 - Installations Quantity                                0
Oct 2022 - Installations Quantity                                0
Nov 2022 - Installations Quantity                                0
Dec 2022 - Installations Quantity                                0
Jan 2023 - Installations Quantity                                0
Feb 2023 - Installations Quantity                                0
Mar 2023 - Installations Quantity                                0
Apr 2023 - Installations Quantity                                0
May 2023 - Installations Quantity                                0
Jun 2023 - Installations Quantity                                0
Installations Quantity Total                                     0
Previous Years (2001-2021) - SGU Output Rated Output In kW    5373
Jan 2022 - SGU Rated Output In kW                             5373
Feb 2022 - SGU Rated Output In kW                             5373
Mar 2022 - SGU Rated Output In kW                             5373
Apr 2022 - SGU Rated Output In kW                             5373
May 2022 - SGU Rated Output In kW                             5373
Jun 2022 - SGU Rated Output In kW                             5373
Jul 2022 - SGU Rated Output In kW                             5373
Aug 2022 - SGU Rated Output In kW                             5373
Sep 2022 - SGU Rated Output In kW                             5373
Oct 2022 - SGU Rated Output In kW                             5373
Nov 2022 - SGU Rated Output In kW                             5373
Dec 2022 - SGU Rated Output In kW                             5373
Jan 2023 - SGU Rated Output In kW                             5373
Feb 2023 - SGU Rated Output In kW                             5373
Mar 2023 - SGU Rated Output In kW                             5373
Apr 2023 - SGU Rated Output In kW                             5373
May 2023 - SGU Rated Output In kW                             5373
Jun 2023 - SGU Rated Output In kW                             5373
SGU Rated Output In kW Total                                  5373
Type                                                             0
Postcode                                                         0
State                                                            0
Long                                                             0
Lat                                                              0
LGA Region                                                       1
Electorate Rating                                              789
dtype: int64
In [ ]:
# df_systems_monthly
df_systems_monthly.info()
# plt.figure(figsize=(20, 10))
# sns.set_palette("husl")

# plt.subplot(1, 3, 2)

# sns.lineplot(data=df_battery_installations2, x='Installation year', y='Total Installations', hue='State', estimator='mean', ci=None)

# plt.title('Battery Installations')
# plt.xticks(rotation=45)

# plt.yscale("log")

# plt.tight_layout()
# plt.show()
<class 'pandas.core.frame.DataFrame'>
Index: 8163 entries, 1 to 8376
Data columns (total 48 columns):
 #   Column                                                      Non-Null Count  Dtype  
---  ------                                                      --------------  -----  
 0   Small Unit Installation Postcode                            8163 non-null   int64  
 1   Previous Years (2001-2021) - Installation Quantity          8163 non-null   int64  
 2   Jan 2022 - Installations Quantity                           8163 non-null   int64  
 3   Feb 2022 - Installations Quantity                           8163 non-null   int64  
 4   Mar 2022 - Installations Quantity                           8163 non-null   int64  
 5   Apr 2022 - Installations Quantity                           8163 non-null   int64  
 6   May 2022 - Installations Quantity                           8163 non-null   int64  
 7   Jun 2022 - Installations Quantity                           8163 non-null   int64  
 8   Jul 2022 - Installations Quantity                           8163 non-null   int64  
 9   Aug 2022 - Installations Quantity                           8163 non-null   int64  
 10  Sep 2022 - Installations Quantity                           8163 non-null   int64  
 11  Oct 2022 - Installations Quantity                           8163 non-null   int64  
 12  Nov 2022 - Installations Quantity                           8163 non-null   int64  
 13  Dec 2022 - Installations Quantity                           8163 non-null   int64  
 14  Jan 2023 - Installations Quantity                           8163 non-null   int64  
 15  Feb 2023 - Installations Quantity                           8163 non-null   int64  
 16  Mar 2023 - Installations Quantity                           8163 non-null   int64  
 17  Apr 2023 - Installations Quantity                           8163 non-null   int64  
 18  May 2023 - Installations Quantity                           8163 non-null   int64  
 19  Jun 2023 - Installations Quantity                           8163 non-null   int64  
 20  Installations Quantity Total                                8163 non-null   int64  
 21  Previous Years (2001-2021) - SGU Output Rated Output In kW  2790 non-null   float64
 22  Jan 2022 - SGU Rated Output In kW                           2790 non-null   float64
 23  Feb 2022 - SGU Rated Output In kW                           2790 non-null   float64
 24  Mar 2022 - SGU Rated Output In kW                           2790 non-null   float64
 25  Apr 2022 - SGU Rated Output In kW                           2790 non-null   float64
 26  May 2022 - SGU Rated Output In kW                           2790 non-null   float64
 27  Jun 2022 - SGU Rated Output In kW                           2790 non-null   float64
 28  Jul 2022 - SGU Rated Output In kW                           2790 non-null   float64
 29  Aug 2022 - SGU Rated Output In kW                           2790 non-null   float64
 30  Sep 2022 - SGU Rated Output In kW                           2790 non-null   float64
 31  Oct 2022 - SGU Rated Output In kW                           2790 non-null   float64
 32  Nov 2022 - SGU Rated Output In kW                           2790 non-null   float64
 33  Dec 2022 - SGU Rated Output In kW                           2790 non-null   float64
 34  Jan 2023 - SGU Rated Output In kW                           2790 non-null   float64
 35  Feb 2023 - SGU Rated Output In kW                           2790 non-null   float64
 36  Mar 2023 - SGU Rated Output In kW                           2790 non-null   float64
 37  Apr 2023 - SGU Rated Output In kW                           2790 non-null   float64
 38  May 2023 - SGU Rated Output In kW                           2790 non-null   float64
 39  Jun 2023 - SGU Rated Output In kW                           2790 non-null   float64
 40  SGU Rated Output In kW Total                                2790 non-null   float64
 41  Type                                                        8163 non-null   object 
 42  Postcode                                                    8163 non-null   float64
 43  State                                                       8163 non-null   object 
 44  Long                                                        8163 non-null   float64
 45  Lat                                                         8163 non-null   float64
 46  LGA Region                                                  8162 non-null   object 
 47  Electorate Rating                                           7374 non-null   object 
dtypes: float64(23), int64(21), object(4)
memory usage: 3.1+ MB
In [ ]: